\relax 
\providecommand\hyper@newdestlabel[2]{}
\providecommand\HyperFirstAtBeginDocument{\AtBeginDocument}
\HyperFirstAtBeginDocument{\ifx\hyper@anchor\@undefined
\global\let\oldcontentsline\contentsline
\gdef\contentsline#1#2#3#4{\oldcontentsline{#1}{#2}{#3}}
\global\let\oldnewlabel\newlabel
\gdef\newlabel#1#2{\newlabelxx{#1}#2}
\gdef\newlabelxx#1#2#3#4#5#6{\oldnewlabel{#1}{{#2}{#3}}}
\AtEndDocument{\ifx\hyper@anchor\@undefined
\let\contentsline\oldcontentsline
\let\newlabel\oldnewlabel
\fi}
\fi}
\global\let\hyper@last\relax 
\gdef\HyperFirstAtBeginDocument#1{#1}
\providecommand*\HyPL@Entry[1]{}
\HyPL@Entry{0<</S/D>>}
\@writefile{toc}{\contentsline {section}{\numberline {1}基于多智能体强化学习的智能空战}{1}{section.1}\protected@file@percent }
\@writefile{toc}{\contentsline {subsection}{\numberline {1.1}多智能体强化学习之Dec-POMDP形式建模}{1}{subsection.1.1}\protected@file@percent }
\@writefile{toc}{\contentsline {subsection}{\numberline {1.2}关键理论问题}{2}{subsection.1.2}\protected@file@percent }
\@writefile{toc}{\contentsline {subsubsection}{\numberline {1.2.1}部分可观测性}{2}{subsubsection.1.2.1}\protected@file@percent }
\@writefile{toc}{\contentsline {subsubsection}{\numberline {1.2.2}信用分配问题}{2}{subsubsection.1.2.2}\protected@file@percent }
\@writefile{toc}{\contentsline {subsection}{\numberline {1.3}经典算法理论分析}{2}{subsection.1.3}\protected@file@percent }
\@writefile{toc}{\contentsline {subsubsection}{\numberline {1.3.1}值分解方法}{2}{subsubsection.1.3.1}\protected@file@percent }
\@writefile{toc}{\contentsline {subsubsection}{\numberline {1.3.2}策略梯度方法}{3}{subsubsection.1.3.2}\protected@file@percent }
\@writefile{lot}{\contentsline {table}{\numberline {1}{\ignorespaces MARL算法理论特性对比\relax }}{3}{table.caption.1}\protected@file@percent }
\providecommand*\caption@xref[2]{\@setref\relax\@undefined{#1}}
\newlabel{tab:marl-theory}{{1}{3}{MARL算法理论特性对比\relax }{table.caption.1}{}}
\@writefile{toc}{\contentsline {subsection}{\numberline {1.4}多智能体强化学习用于多机空战}{3}{subsection.1.4}\protected@file@percent }
\@writefile{lof}{\contentsline {figure}{\numberline {1}{\ignorespaces 3v3空战的典型钳形进攻策略\relax }}{4}{figure.caption.2}\protected@file@percent }
\newlabel{fig:3v3空战的典型钳形进攻策略}{{1}{4}{3v3空战的典型钳形进攻策略\relax }{figure.caption.2}{}}
\@writefile{lof}{\contentsline {figure}{\numberline {2}{\ignorespaces 基于MAPPO的多机对抗典型架构\relax }}{4}{figure.caption.3}\protected@file@percent }
\newlabel{fig:基于MAPPO的多机对抗典型架构}{{2}{4}{基于MAPPO的多机对抗典型架构\relax }{figure.caption.3}{}}
\@writefile{toc}{\contentsline {section}{\numberline {2}大语言模型在空战等多智能体强化学习中的应用综述}{5}{section.2}\protected@file@percent }
\@writefile{toc}{\contentsline {subsection}{\numberline {2.1}基于大语言模型的强化学习研究进展}{5}{subsection.2.1}\protected@file@percent }
\citation{221003629ReActSynergizing}
\citation{ReflexionLanguageAgents}
\citation{ADaPTAsneededDecomposition}
\@writefile{lof}{\contentsline {figure}{\numberline {3}{\ignorespaces 用LLM来增强RL的典型应用\relax }}{6}{figure.caption.4}\protected@file@percent }
\newlabel{fig:用LLM来增强RL的典型应用}{{3}{6}{用LLM来增强RL的典型应用\relax }{figure.caption.4}{}}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {2.1.1}开环LLM决策框架}{6}{subsubsection.2.1.1}\protected@file@percent }
\citation{230401904REFINERReasoning}
\citation{zhangSimpleFrameworkIntrinsic}
\citation{nREXRapidExploration2023}
\citation{hanLargeLanguageModel2024}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {2.1.2}闭环LLM强化学习框架}{7}{subsubsection.2.1.2}\protected@file@percent }
\@writefile{toc}{\contentsline {subsection}{\numberline {2.2}智能空战场景下借助LLM进行飞行控制}{7}{subsection.2.2}\protected@file@percent }
\@writefile{lof}{\contentsline {figure}{\numberline {4}{\ignorespaces LLM引导的强化学习框架.\relax }}{8}{figure.caption.5}\protected@file@percent }
\newlabel{fig:LLM引导的强化学习框架}{{4}{8}{LLM引导的强化学习框架.\relax }{figure.caption.5}{}}
\@writefile{lof}{\contentsline {figure}{\numberline {5}{\ignorespaces 加入LLM后的平均回合奖励.\relax }}{8}{figure.caption.6}\protected@file@percent }
\newlabel{fig:加入LLM后的平均回合奖励}{{5}{8}{加入LLM后的平均回合奖励.\relax }{figure.caption.6}{}}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {2.2.1}LLM驱动的先验知识嵌入机制}{9}{subsubsection.2.2.1}\protected@file@percent }
\@writefile{toc}{\contentsline {subsubsection}{\numberline {2.2.2}动态奖励函数与耦合控制优化}{9}{subsubsection.2.2.2}\protected@file@percent }
\newlabel{复合奖励函数}{{5}{9}{动态奖励函数与耦合控制优化}{equation.2.5}{}}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {2.2.3}分层引导的训练架构}{9}{subsubsection.2.2.3}\protected@file@percent }
\@writefile{lof}{\contentsline {figure}{\numberline {6}{\ignorespaces 水平飞行速度跟踪误差.\relax }}{10}{figure.caption.7}\protected@file@percent }
\newlabel{fig:水平飞行速度跟踪误差}{{6}{10}{水平飞行速度跟踪误差.\relax }{figure.caption.7}{}}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {2.2.4}复杂战术动作的泛化能力验证}{10}{subsubsection.2.2.4}\protected@file@percent }
\newlabel{fig:immelmann-main}{{7a}{11}{整体仿真结果\relax }{figure.caption.8}{}}
\newlabel{sub@fig:immelmann-main}{{a}{11}{整体仿真结果\relax }{figure.caption.8}{}}
\newlabel{fig:immelmann-yaw}{{7b}{11}{偏航角误差\relax }{figure.caption.8}{}}
\newlabel{sub@fig:immelmann-yaw}{{b}{11}{偏航角误差\relax }{figure.caption.8}{}}
\newlabel{fig:immelmann-altitude}{{7c}{11}{海拔高度误差\relax }{figure.caption.8}{}}
\newlabel{sub@fig:immelmann-altitude}{{c}{11}{海拔高度误差\relax }{figure.caption.8}{}}
\newlabel{fig:immelmann-velocity}{{7d}{11}{X方向速度误差\relax }{figure.caption.8}{}}
\newlabel{sub@fig:immelmann-velocity}{{d}{11}{X方向速度误差\relax }{figure.caption.8}{}}
\newlabel{fig:immelmann-roll}{{7e}{11}{滚转角误差\relax }{figure.caption.8}{}}
\newlabel{sub@fig:immelmann-roll}{{e}{11}{滚转角误差\relax }{figure.caption.8}{}}
\@writefile{lof}{\contentsline {figure}{\numberline {7}{\ignorespaces 伊梅尔曼回转仿真结果分析\relax }}{11}{figure.caption.8}\protected@file@percent }
\newlabel{fig:immelmann-simulation}{{7}{11}{伊梅尔曼回转仿真结果分析\relax }{figure.caption.8}{}}
\@writefile{lof}{\contentsline {figure}{\numberline {8}{\ignorespaces 环形机动X方向速度误差.\relax }}{11}{figure.caption.9}\protected@file@percent }
\newlabel{fig:环形机动X方向速度误差}{{8}{11}{环形机动X方向速度误差.\relax }{figure.caption.9}{}}
\@writefile{lof}{\contentsline {figure}{\numberline {9}{\ignorespaces 水平飞行仿真结果.\relax }}{12}{figure.caption.10}\protected@file@percent }
\newlabel{fig:水平飞行仿真结果}{{9}{12}{水平飞行仿真结果.\relax }{figure.caption.10}{}}
\@writefile{lof}{\contentsline {figure}{\numberline {10}{\ignorespaces 水平飞行滚转误差.\relax }}{12}{figure.caption.11}\protected@file@percent }
\newlabel{fig:水平飞行滚转误差}{{10}{12}{水平飞行滚转误差.\relax }{figure.caption.11}{}}
\citation{shaoSwarmBrainEmbodiedAgent2024}
\@writefile{toc}{\contentsline {subsection}{\numberline {2.3}LLM在典型多智能体强化学习场景中的应用综述}{13}{subsection.2.3}\protected@file@percent }
\@writefile{toc}{\contentsline {subsubsection}{\numberline {2.3.1}LLM在多智能体强化学习领域的最新应用综述}{13}{subsubsection.2.3.1}\protected@file@percent }
\citation{fengNaturalLanguageReinforcement2024}
\@writefile{lof}{\contentsline {figure}{\numberline {11}{\ignorespaces SwarmBrain与星际争霸II环境交互框架\relax }}{14}{figure.caption.12}\protected@file@percent }
\newlabel{fig:SwarmBrain与星际争霸II环境交互框架}{{11}{14}{SwarmBrain与星际争霸II环境交互框架\relax }{figure.caption.12}{}}
\newlabel{fig:对抗五种不同难度设置的获胜次数}{{12a}{14}{整体仿真结果\relax }{figure.caption.13}{}}
\newlabel{sub@fig:对抗五种不同难度设置的获胜次数}{{a}{14}{整体仿真结果\relax }{figure.caption.13}{}}
\newlabel{fig:对抗五种不同难度设置的平均获胜时间}{{12b}{14}{对抗五种不同难度设置中平均获胜时间\relax }{figure.caption.13}{}}
\newlabel{sub@fig:对抗五种不同难度设置的平均获胜时间}{{b}{14}{对抗五种不同难度设置中平均获胜时间\relax }{figure.caption.13}{}}
\@writefile{lof}{\contentsline {figure}{\numberline {12}{\ignorespaces 对抗不同难度对手的胜率和平均比赛时间\relax }}{14}{figure.caption.13}\protected@file@percent }
\newlabel{fig:对抗不同难度对手的胜率和平均比赛时间}{{12}{14}{对抗不同难度对手的胜率和平均比赛时间\relax }{figure.caption.13}{}}
\citation{zhangEfficientLLMGrounding2024}
\@writefile{lof}{\contentsline {figure}{\numberline {13}{\ignorespaces NLRL框架示意图：将强化学习组件映射至自然语言空间，并通过LLM实现策略生成、价值评估与信息聚合\relax }}{15}{figure.caption.14}\protected@file@percent }
\newlabel{fig:NLRL}{{13}{15}{NLRL框架示意图：将强化学习组件映射至自然语言空间，并通过LLM实现策略生成、价值评估与信息聚合\relax }{figure.caption.14}{}}
\citation{maLargeLanguageModels2024}
\@writefile{lot}{\contentsline {table}{\numberline {2}{\ignorespaces Frozen-Lake环境中NLRL策略迭代性能对比\relax }}{16}{table.caption.15}\protected@file@percent }
\newlabel{tab:frozen_results}{{2}{16}{Frozen-Lake环境中NLRL策略迭代性能对比\relax }{table.caption.15}{}}
\@writefile{lof}{\contentsline {figure}{\numberline {14}{\ignorespaces RoCo方法和作者方法的决策流程对比\relax }}{16}{figure.caption.16}\protected@file@percent }
\newlabel{RoCo方法和作者方法的决策流程对比}{{14}{16}{RoCo方法和作者方法的决策流程对比\relax }{figure.caption.16}{}}
\@writefile{lof}{\contentsline {figure}{\numberline {15}{\ignorespaces Prompt和细化提示的概述\relax }}{16}{figure.caption.17}\protected@file@percent }
\newlabel{Prompt和细化提示的概述}{{15}{16}{Prompt和细化提示的概述\relax }{figure.caption.17}{}}
\citation{yimEvaluatingEnhancingLLMs2024}
\@writefile{lof}{\contentsline {figure}{\numberline {16}{\ignorespaces CoS增强的LLM方法与Text版星际争霸II交互的结构图\relax }}{17}{figure.caption.18}\protected@file@percent }
\newlabel{CoS增强的LLM方法与Text版星际争霸II交互的结构图}{{16}{17}{CoS增强的LLM方法与Text版星际争霸II交互的结构图\relax }{figure.caption.18}{}}
\@writefile{lof}{\contentsline {figure}{\numberline {17}{\ignorespaces 构建防御结构和提前预测危险的能力：Alphastar vs. LLM代理\relax }}{18}{figure.caption.19}\protected@file@percent }
\newlabel{构建防御结构和提前预测危险的能力：Alphastar vs. LLM代理}{{17}{18}{构建防御结构和提前预测危险的能力：Alphastar vs. LLM代理\relax }{figure.caption.19}{}}
\@writefile{lof}{\contentsline {figure}{\numberline {18}{\ignorespaces 加入ToM后 LLM代理的操作流程\relax }}{18}{figure.caption.20}\protected@file@percent }
\newlabel{加入ToM后 LLM代理的操作流程}{{18}{18}{加入ToM后 LLM代理的操作流程\relax }{figure.caption.20}{}}
\citation{kannanSMARTLLMSmartMultiagent2024}
\@writefile{lof}{\contentsline {figure}{\numberline {19}{\ignorespaces 各种LLM Agents对抗随机、基于规则和Danzero+对手的平均表现\relax }}{19}{figure.caption.21}\protected@file@percent }
\newlabel{各种LLM对抗随机、基于规则和Danzero+对手的平均表现}{{19}{19}{各种LLM Agents对抗随机、基于规则和Danzero+对手的平均表现\relax }{figure.caption.21}{}}
\@writefile{lof}{\contentsline {figure}{\numberline {20}{\ignorespaces SMART LLM多具身智能的框架总览\relax }}{20}{figure.caption.22}\protected@file@percent }
\newlabel{SMART LLM多具身智能的框架总览}{{20}{20}{SMART LLM多具身智能的框架总览\relax }{figure.caption.22}{}}
\@writefile{lof}{\contentsline {figure}{\numberline {21}{\ignorespaces SMART LLM的四个关键阶段\relax }}{21}{figure.caption.23}\protected@file@percent }
\newlabel{SMART LLM的四个关键阶段}{{21}{21}{SMART LLM的四个关键阶段\relax }{figure.caption.23}{}}
\citation{yuCoNavGPTMultirobotCooperative2023}
\@writefile{lof}{\contentsline {figure}{\numberline {22}{\ignorespaces SMARTLLM和基线在AI2-THOR模拟器中的指标对比\relax }}{22}{figure.caption.24}\protected@file@percent }
\newlabel{SMARTLLM和基线在AI2-THOR模拟器中的指标对比}{{22}{22}{SMARTLLM和基线在AI2-THOR模拟器中的指标对比\relax }{figure.caption.24}{}}
\@writefile{lof}{\contentsline {figure}{\numberline {23}{\ignorespaces SMARTLLM消融实验\relax }}{23}{figure.caption.25}\protected@file@percent }
\newlabel{SMARTLLM消融实验}{{23}{23}{SMARTLLM消融实验\relax }{figure.caption.25}{}}
\@writefile{lof}{\contentsline {figure}{\numberline {24}{\ignorespaces 真实机器人实验：a）机器人团队和要巡逻的区域；b）机器人在任务规划和巡逻后根据能见度区域分配的各自区域\relax }}{23}{figure.caption.26}\protected@file@percent }
\newlabel{SMART LLM真实机器人实验}{{24}{23}{真实机器人实验：a）机器人团队和要巡逻的区域；b）机器人在任务规划和巡逻后根据能见度区域分配的各自区域\relax }{figure.caption.26}{}}
\@writefile{lof}{\contentsline {figure}{\numberline {25}{\ignorespaces 目标导航框架的总体架构\relax }}{24}{figure.caption.27}\protected@file@percent }
\newlabel{目标导航框架的总体架构}{{25}{24}{目标导航框架的总体架构\relax }{figure.caption.27}{}}
\citation{chenMultiagentConsensusSeeking2025}
\@writefile{lof}{\contentsline {figure}{\numberline {26}{\ignorespaces 从局部地图中提取场景对象和墙壁示例\relax }}{25}{figure.caption.28}\protected@file@percent }
\newlabel{从局部地图中提取场景对象和墙壁}{{26}{25}{从局部地图中提取场景对象和墙壁示例\relax }{figure.caption.28}{}}
\@writefile{lof}{\contentsline {figure}{\numberline {27}{\ignorespaces 两个机器人寻找椅子的导航过程\relax }}{25}{figure.caption.29}\protected@file@percent }
\newlabel{两个机器人寻找椅子的导航过程}{{27}{25}{两个机器人寻找椅子的导航过程\relax }{figure.caption.29}{}}
\@writefile{lot}{\contentsline {table}{\numberline {3}{\ignorespaces 多机器人导航性能对比\relax }}{25}{table.caption.30}\protected@file@percent }
\newlabel{tab:results}{{3}{25}{多机器人导航性能对比\relax }{table.caption.30}{}}
\citation{xuExploringLargeLanguage2024}
\@writefile{lot}{\contentsline {table}{\numberline {4}{\ignorespaces 不同智能体数量下的共识值统计\relax }}{26}{table.caption.34}\protected@file@percent }
\newlabel{tab:stats}{{4}{26}{不同智能体数量下的共识值统计\relax }{table.caption.34}{}}
\@writefile{lof}{\contentsline {figure}{\numberline {28}{\ignorespaces 两个代理谈判过程的图示\relax }}{27}{figure.caption.31}\protected@file@percent }
\newlabel{两个代理谈判过程的图示}{{28}{27}{两个代理谈判过程的图示\relax }{figure.caption.31}{}}
\@writefile{lof}{\contentsline {figure}{\numberline {29}{\ignorespaces 成功达成共识的示例\relax }}{28}{figure.caption.32}\protected@file@percent }
\newlabel{成功达成共识的示例}{{29}{28}{成功达成共识的示例\relax }{figure.caption.32}{}}
\@writefile{lof}{\contentsline {figure}{\numberline {30}{\ignorespaces 在多机器人聚合中的应用\relax }}{29}{figure.caption.33}\protected@file@percent }
\newlabel{在多机器人聚合中的应用}{{30}{29}{在多机器人聚合中的应用\relax }{figure.caption.33}{}}
\@writefile{lof}{\contentsline {figure}{\numberline {31}{\ignorespaces 最终共识值的统计结果\relax }}{30}{figure.caption.35}\protected@file@percent }
\newlabel{最终共识值的统计结果}{{31}{30}{最终共识值的统计结果\relax }{figure.caption.35}{}}
\@writefile{lof}{\contentsline {figure}{\numberline {32}{\ignorespaces 狼人杀各职业信任关系表\relax }}{30}{figure.caption.36}\protected@file@percent }
\newlabel{狼人杀各职业信任关系表}{{32}{30}{狼人杀各职业信任关系表\relax }{figure.caption.36}{}}
\@writefile{lof}{\contentsline {figure}{\numberline {33}{\ignorespaces MultiAgent狼人杀游戏的具体实现\relax }}{31}{figure.caption.37}\protected@file@percent }
\newlabel{MultiAgent狼人杀游戏的具体实现}{{33}{31}{MultiAgent狼人杀游戏的具体实现\relax }{figure.caption.37}{}}
\@writefile{lot}{\contentsline {table}{\numberline {5}{\ignorespaces 消融实验：模块对输出合理性的影响\relax }}{31}{table.caption.39}\protected@file@percent }
\newlabel{tab:ablation}{{5}{31}{消融实验：模块对输出合理性的影响\relax }{table.caption.39}{}}
\@writefile{lof}{\contentsline {figure}{\numberline {34}{\ignorespaces 响应生成的Prompt大纲\relax }}{32}{figure.caption.38}\protected@file@percent }
\newlabel{响应生成的Prompt大纲}{{34}{32}{响应生成的Prompt大纲\relax }{figure.caption.38}{}}
\citation{lightStrategistLearningStrategic2024}
\citation{liuLanguagedrivenPolicyDistillation2024}
\@writefile{lot}{\contentsline {table}{\numberline {6}{\ignorespaces 村民阵营胜率与游戏时长对比\relax }}{33}{table.caption.40}\protected@file@percent }
\newlabel{tab:win_rate}{{6}{33}{村民阵营胜率与游戏时长对比\relax }{table.caption.40}{}}
\@writefile{lof}{\contentsline {figure}{\numberline {35}{\ignorespaces w/o经验中学习的效果对比\relax }}{34}{figure.caption.41}\protected@file@percent }
\newlabel{w/o经验中学习的效果对比}{{35}{34}{w/o经验中学习的效果对比\relax }{figure.caption.41}{}}
\@writefile{lof}{\contentsline {figure}{\numberline {36}{\ignorespaces STRATEGIST概览\relax }}{35}{figure.caption.42}\protected@file@percent }
\newlabel{STRATEGIST概览}{{36}{35}{STRATEGIST概览\relax }{figure.caption.42}{}}
\@writefile{lof}{\contentsline {figure}{\numberline {37}{\ignorespaces 不同训练方法在Avalon和GOPS中相对于基线的性能\relax }}{35}{figure.caption.44}\protected@file@percent }
\newlabel{不同训练方法在Avalon和GOPS中相对于基线的性能}{{37}{35}{不同训练方法在Avalon和GOPS中相对于基线的性能\relax }{figure.caption.44}{}}
\citation{pangKALMKnowledgeableAgents2024}
\@writefile{lot}{\contentsline {table}{\numberline {7}{\ignorespaces 不同自优化方法在GOPS和Avalon中的表现对比\relax }}{36}{table.caption.43}\protected@file@percent }
\newlabel{tab:comparison}{{7}{36}{不同自优化方法在GOPS和Avalon中的表现对比\relax }{table.caption.43}{}}
\@writefile{lof}{\contentsline {figure}{\numberline {38}{\ignorespaces LDPD框架\relax }}{36}{figure.caption.45}\protected@file@percent }
\newlabel{LDPD框架}{{38}{36}{LDPD框架\relax }{figure.caption.45}{}}
\@writefile{lot}{\contentsline {table}{\numberline {8}{\ignorespaces 不同方法在匝道合流场景下的性能对比（困难模式）\relax }}{36}{table.caption.46}\protected@file@percent }
\newlabel{tab:performance}{{8}{36}{不同方法在匝道合流场景下的性能对比（困难模式）\relax }{table.caption.46}{}}
\@writefile{lof}{\contentsline {figure}{\numberline {39}{\ignorespaces 场景1中困难模式的指标对比\relax }}{37}{figure.caption.47}\protected@file@percent }
\newlabel{场景1中困难模式的指标对比}{{39}{37}{场景1中困难模式的指标对比\relax }{figure.caption.47}{}}
\@writefile{lof}{\contentsline {figure}{\numberline {40}{\ignorespaces KALM方法框架：(1) \textbf  {LLM基础训练阶段}：通过监督微调使LLM理解环境状态与动作的数值表示；(2) \textbf  {虚拟轨迹生成阶段}：基于目标导向提示生成未见任务的轨迹；(3) \textbf  {离线强化学习阶段}：结合真实数据与虚拟轨迹训练策略。图中虚线表示可选的在线迭代优化流程。\relax }}{38}{figure.caption.48}\protected@file@percent }
\newlabel{fig:framework}{{40}{38}{KALM方法框架：(1) \textbf {LLM基础训练阶段}：通过监督微调使LLM理解环境状态与动作的数值表示；(2) \textbf {虚拟轨迹生成阶段}：基于目标导向提示生成未见任务的轨迹；(3) \textbf {离线强化学习阶段}：结合真实数据与虚拟轨迹训练策略。图中虚线表示可选的在线迭代优化流程。\relax }{figure.caption.48}{}}
\@writefile{lof}{\contentsline {figure}{\numberline {41}{\ignorespaces 不同方法在CLEVR-Robot环境中的成功率对比。KALM（CQL+KALM）在语义改写任务（Rephrasing）、简单未见任务（Unseen-Easy）和复杂未见任务（Unseen-Hard）上分别达到83\%、42\%和28\%的轨迹匹配率，显著优于纯离线强化学习方法（如CQL的15.5\%）。\relax }}{38}{figure.caption.49}\protected@file@percent }
\newlabel{fig:results}{{41}{38}{不同方法在CLEVR-Robot环境中的成功率对比。KALM（CQL+KALM）在语义改写任务（Rephrasing）、简单未见任务（Unseen-Easy）和复杂未见任务（Unseen-Hard）上分别达到83\%、42\%和28\%的轨迹匹配率，显著优于纯离线强化学习方法（如CQL的15.5\%）。\relax }{figure.caption.49}{}}
\citation{dengSMACR1EmergenceIntelligence2025}
\@writefile{lof}{\contentsline {figure}{\numberline {42}{\ignorespaces SMAC-R1框架流程：(a) \textbf  {策略规划}：Planner模块生成战术框架；(b) \textbf  {代码生成}：Coder模块转化为Python代码；(c) \textbf  {反馈优化}：Critic模块基于环境奖励优化策略；(d) \textbf  {模型蒸馏}：通过SFT和GRPO训练轻量级模型。\relax }}{39}{figure.caption.50}\protected@file@percent }
\newlabel{fig:framework}{{42}{39}{SMAC-R1框架流程：(a) \textbf {策略规划}：Planner模块生成战术框架；(b) \textbf {代码生成}：Coder模块转化为Python代码；(c) \textbf {反馈优化}：Critic模块基于环境奖励优化策略；(d) \textbf {模型蒸馏}：通过SFT和GRPO训练轻量级模型。\relax }{figure.caption.50}{}}
\@writefile{lof}{\contentsline {figure}{\numberline {43}{\ignorespaces 不同方法在SMAC任务中的表现对比。SMAC-R1在简单任务（3m）和复杂任务（MMM2）中的代码生成轮次分别降至1.4轮和3.6轮，胜率达100\%，显著优于DeepSeek-236B（需15-35轮）。\relax }}{40}{figure.caption.51}\protected@file@percent }
\newlabel{fig:results}{{43}{40}{不同方法在SMAC任务中的表现对比。SMAC-R1在简单任务（3m）和复杂任务（MMM2）中的代码生成轮次分别降至1.4轮和3.6轮，胜率达100\%，显著优于DeepSeek-236B（需15-35轮）。\relax }{figure.caption.51}{}}
\@writefile{lof}{\contentsline {figure}{\numberline {44}{\ignorespaces 策略迁移示例：(a) "3s\_vs\_4z"任务生成的三角阵型策略；(b) 同一策略在"3s\_vs\_5z"任务中的直接应用效果。红色箭头表示集火目标，蓝色虚线为风筝路径。\relax }}{40}{figure.caption.52}\protected@file@percent }
\newlabel{fig:tactics}{{44}{40}{策略迁移示例：(a) "3s\_vs\_4z"任务生成的三角阵型策略；(b) 同一策略在"3s\_vs\_5z"任务中的直接应用效果。红色箭头表示集火目标，蓝色虚线为风筝路径。\relax }{figure.caption.52}{}}
\citation{weiLEROLLMdrivenEvolutionary2025}
\@writefile{lof}{\contentsline {figure}{\numberline {45}{\ignorespaces LERO与基线方法的性能对比\relax }}{41}{figure.caption.53}\protected@file@percent }
\newlabel{fig:performance}{{45}{41}{LERO与基线方法的性能对比\relax }{figure.caption.53}{}}
\newlabel{eq:hybrid_reward}{{6}{41}{LLM在多智能体强化学习领域的最新应用综述}{equation.2.6}{}}
\bibstyle{gbt7714-numerical}
\bibdata{F:/BibTeXref/zoterorepo.bib}
\bibcite{221003629ReActSynergizing}{{1}{[2025]}{{221}}{{}}}
\bibcite{ReflexionLanguageAgents}{{2}{[2025]}{{Ref}}{{}}}
\bibcite{ADaPTAsneededDecomposition}{{3}{[2025]}{{ADa}}{{}}}
\@writefile{lof}{\contentsline {figure}{\numberline {46}{\ignorespaces LERO框架的迭代优化流程\relax }}{42}{figure.caption.54}\protected@file@percent }
\newlabel{fig:framework}{{46}{42}{LERO框架的迭代优化流程\relax }{figure.caption.54}{}}
\bibcite{230401904REFINERReasoning}{{4}{[2025]}{{230}}{{}}}
\bibcite{zhangSimpleFrameworkIntrinsic}{{5}{}{{Zhang et~al.}}{{Zhang, Parashar, and Saha}}}
\bibcite{nREXRapidExploration2023}{{6}{2023}{{N et~al.}}{{N, Heinecke, Niebles, Liu, Xue, Yao, Feng, Chen, Gokul, Arpit, Xu, Mui, Wang, Xiong, and Savarese}}}
\bibcite{hanLargeLanguageModel2024}{{7}{2024}{{Han et~al.}}{{Han, Yang, Ren, and Li}}}
\bibcite{shaoSwarmBrainEmbodiedAgent2024}{{8}{2024}{{Shao et~al.}}{{Shao, Jiang, Zuo, and Liu}}}
\bibcite{fengNaturalLanguageReinforcement2024}{{9}{2024}{{Feng et~al.}}{{Feng, Wan, Yang, Wang, Koushik, Du, Wen, and Wang}}}
\bibcite{zhangEfficientLLMGrounding2024}{{10}{2024}{{Zhang et~al.}}{{Zhang, Yang, Bai, Wu, Li, Wang, and Li}}}
\@writefile{lof}{\contentsline {figure}{\numberline {47}{\ignorespaces 消融实验\relax }}{43}{figure.caption.55}\protected@file@percent }
\newlabel{fig:ablation}{{47}{43}{消融实验\relax }{figure.caption.55}{}}
\bibcite{maLargeLanguageModels2024}{{11}{2024}{{Ma et~al.}}{{Ma, Mi, Zeng, Yan, Wu, Lin, Zhang, and Wang}}}
\bibcite{yimEvaluatingEnhancingLLMs2024}{{12}{2024}{{Yim et~al.}}{{Yim, Chan, Shi, Deng, Fan, Zheng, and Song}}}
\bibcite{kannanSMARTLLMSmartMultiagent2024}{{13}{2024}{{Kannan et~al.}}{{Kannan, Venkatesh, and Min}}}
\bibcite{yuCoNavGPTMultirobotCooperative2023}{{14}{2023}{{Yu et~al.}}{{Yu, Kasaei, and Cao}}}
\bibcite{chenMultiagentConsensusSeeking2025}{{15}{2025}{{Chen et~al.}}{{Chen, Ji, Xu, and Zhao}}}
\bibcite{xuExploringLargeLanguage2024}{{16}{2024}{{Xu et~al.}}{{Xu, Wang, Li, Luo, Wang, Liu, and Liu}}}
\bibcite{lightStrategistLearningStrategic2024}{{17}{2024}{{Light et~al.}}{{Light, Cai, Chen, Wang, Chen, Cheng, Yue, and Hu}}}
\bibcite{liuLanguagedrivenPolicyDistillation2024}{{18}{2024}{{Liu et~al.}}{{Liu, Xu, Hang, Sun, Ding, Zhan, and Tomizuka}}}
\bibcite{pangKALMKnowledgeableAgents2024}{{19}{2024}{{Pang et~al.}}{{Pang, Yang, Li, Zhang, Chen, Tang, and Yu}}}
\bibcite{dengSMACR1EmergenceIntelligence2025}{{20}{2025}{{Deng et~al.}}{{Deng, Ma, Fan, Song, Zhang, Zhang, and Zhao}}}
\bibcite{weiLEROLLMdrivenEvolutionary2025}{{21}{2025}{{Wei et~al.}}{{Wei, Shan, and Li}}}
\gdef \@abspage@last{45}
